In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
In [2]:
df=pd.read_csv(r"C:\Users\Manikanta\Downloads\diamonds.csv",index_col=[0])
df
Out[2]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 53940 entries, 1 to 53940
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   carat    53940 non-null  float64
 1   cut      53940 non-null  object 
 2   color    53940 non-null  object 
 3   clarity  53940 non-null  object 
 4   depth    53940 non-null  float64
 5   table    53940 non-null  float64
 6   price    53940 non-null  int64  
 7   x        53940 non-null  float64
 8   y        53940 non-null  float64
 9   z        53940 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 4.5+ MB
In [4]:
df.duplicated().sum()
Out[4]:
146
In [5]:
a=df.drop_duplicates()
a
Out[5]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53794 rows × 10 columns

In [6]:
a.duplicated().sum()
Out[6]:
0
In [7]:
a.isnull().sum()
Out[7]:
carat      0
cut        0
color      0
clarity    0
depth      0
table      0
price      0
x          0
y          0
z          0
dtype: int64
In [8]:
df.head(2)
Out[8]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31

- bar plot of color v/s price¶

In [9]:
a.columns
Out[9]:
Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')
In [10]:
fig = px.bar(a, 
             x ='color', 
             y ='price',
             color ='color',
             animation_frame ='carat',
             hover_name ='depth')
fig.show()
In [11]:
a['cut'].unique()
Out[11]:
array(['Ideal', 'Premium', 'Good', 'Very Good', 'Fair'], dtype=object)
In [12]:
a['color'].unique()
Out[12]:
array(['E', 'I', 'J', 'H', 'F', 'G', 'D'], dtype=object)
In [13]:
a['clarity'].unique()
Out[13]:
array(['SI2', 'SI1', 'VS1', 'VS2', 'VVS2', 'VVS1', 'I1', 'IF'],
      dtype=object)
In [14]:
a.columns
Out[14]:
Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

- bar plot of cut v/s price¶

In [15]:
fig = px.bar(a, 
             x ='cut', 
             y ='price',
             color='color',
             animation_frame ='clarity',
             hover_name ='depth')
fig.show()
  • scatter plot
In [16]:
a.columns
Out[16]:
Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')
In [17]:
a.head()
Out[17]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75

- Scatter¶

In [18]:
fig = px.scatter(
    a, 
    x="carat", 
    y="price", 
    animation_frame="color", 
    animation_group="depth",
    size="table", 
    color="color", 
    hover_name="carat",
    title="Scatter plot for crat & price where animationFrame :-color & animationGroup :-depth"
)
fig.show()
In [19]:
fig = px.scatter(a, x="carat", y="price", animation_frame="color", color="color", hover_name="depth",
                title="Scatter plot for crat & price where animationFrame :-color")
fig.show()
  • Strip plot
In [20]:
fig=px.strip(a,x='cut',y='table',animation_frame="color", color="color", hover_name="depth",
            title="Strip plot for cut & table where animationFrame :-color")
fig.show()
In [21]:
fig=px.strip(a,x='cut',y='table',animation_frame="clarity", color="color", hover_name="depth",
             title="Strip plot for cut & table where animationFrame :-clarity")
fig.show()
In [22]:
a.columns
Out[22]:
Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')
In [23]:
a.head(1)
Out[23]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
In [24]:
fig=px.scatter(a,x='cut',y="price",animation_frame="clarity", color="color", hover_name="depth")
fig.show()
  • Histogram
In [25]:
a.head()
Out[25]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [26]:
fig=px.histogram(a,x='carat',y='depth',animation_frame="color", color="color", hover_name="price",
                 title="Histogram plot for carat &  where animationFrame :-color")
fig.show()
  • line chart
In [27]:
a.head(1)
Out[27]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
In [28]:
fig=px.line(a,x="table",y="price",animation_frame="cut", color="color", hover_name="price",markers=True,
            title="line chart for table & price where animationFrame :-cut")
fig.show()

- Box plot¶

In [29]:
a.head(1)
Out[29]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
In [30]:
fig=px.box(a,x="clarity",y='table',animation_frame="cut", color="color", hover_name="price",
            title="Box plot for clarity & table where animationFrame :-cut")
fig.show()
In [31]:
fig=px.box(a,x="color",y='table',animation_frame="cut", color="color", hover_name="price",points="all",
            title="Box plot for color & table where animationFrame :-cut")
fig.show()

- Violin plot¶

In [32]:
a.head(2)
Out[32]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
In [33]:
fig=px.violin(a,y="price",animation_frame="cut", color="color", hover_name="price",points="all",
            title="Box plot for color & table where animationFrame :-cut")
fig.show()
In [34]:
fig=px.violin(a,y="price",animation_frame="cut", color="color", hover_name="price",
            title="Box plot for color & table where animationFrame :-cut with out points")
fig.show()

- joint plot¶

In [35]:
fig = px.scatter(a,
                x="carat",
                y="price",
                marginal_x="histogram",
                marginal_y="histogram",
                animation_frame="cut",
                hover_name="price",
                color="color",
                title="Joint Plot")
fig.show()

Bubble plot¶

In [36]:
fig=px.scatter(a, x="table", y="price", size="depth",animation_frame="cut", color="color", hover_name="price",
            title="Bubble plot for table &price where animationFrame :-cut")

fig.show()

- Pair plot¶

In [37]:
fig= px.scatter_matrix(a,dimensions=["x","y","z"],color="color",title="Pair Plot")
fig.show()
C:\Users\Manikanta\Documents\Custom Office Templates\lib\site-packages\plotly\express\_core.py:279: FutureWarning:

iteritems is deprecated and will be removed in a future version. Use .items instead.

In [38]:
a.head()
Out[38]:
carat cut color clarity depth table price x y z
1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: